library(socviz)
library(lubridate)
library(geofacet)
library(ggthemes)
library(ggrepel)
library(ggridges)
library(plyr)
library(skimr)
library(tidyverse)
library(gganimate)
library(plotly)
theme_set(theme_minimal())


Data Wrangling



Happiness Data

# Read 2015 Data
h15 <- read_csv("Happiness_Data/2015.csv")
h15 <- h15 %>%
  dplyr::mutate(Year = 2015) %>%
  dplyr::rename(H_rank=`Happiness Rank`, # Modify variable names
                H_score = `Happiness Score`,
                GDP=`Economy (GDP per Capita)`,
                Health=`Health (Life Expectancy)`,
                Trust=`Trust (Government Corruption)`,
                SE=`Standard Error`,
                dystopia_res = `Dystopia Residual`) 


# Read 2016 Data
h16 <- read_csv("Happiness_Data/2016.csv")  
h16 <- h16 %>%
  dplyr::mutate(Year = 2016,
      `Standard Error` = (`Upper Confidence Interval`-`Lower Confidence Interval`)/3.92) %>%
              # SE = (upper limit – lower limit) / 3.92. 
              # This is for 95% CI
  dplyr::select(-c(`Upper Confidence Interval`,`Lower Confidence Interval`)) %>%
  dplyr::rename(H_rank=`Happiness Rank`, # Modify variable names
                H_score = `Happiness Score`,
                GDP=`Economy (GDP per Capita)`,
                Health=`Health (Life Expectancy)`,
                Trust=`Trust (Government Corruption)`,
                SE=`Standard Error`,
                dystopia_res = `Dystopia Residual`)



# Since we don't have a variable 'Region' starting from 2017, we will create it for 
# each year
h_regions <- dplyr::select(h16, Country, Region)



# Read 2017 Data
h17 <- read_csv("Happiness_Data/2017.csv")  
h17 <- h17 %>%
  dplyr::mutate(Year = 2017,
                `Standard Error` = (`Whisker.high`-`Whisker.low`)/3.92,) %>%
  merge(h_regions,by="Country", all.x=T) %>%
  dplyr::select(-c(`Whisker.high`,`Whisker.low`)) %>%
    dplyr::rename(H_rank=`Happiness.Rank`, # Modify variable names
                H_score = Happiness.Score,
                GDP=Economy..GDP.per.Capita.,
                Health=Health..Life.Expectancy.,
                Trust=Trust..Government.Corruption.,
                SE=`Standard Error`,
                dystopia_res = Dystopia.Residual)


# Read 2018 Data
h18 <- read_csv("Happiness_Data/2018.csv")  
h18 <- h18 %>%
  dplyr::mutate(Year = 2018) %>%
  dplyr::rename(H_rank=`Overall rank`, # Modify variable names
                H_score = `Score`,
                GDP=`GDP per capita`,
                Country = `Country or region`,
                Health=`Healthy life expectancy`,
                Trust=`Perceptions of corruption`,
                Freedom = `Freedom to make life choices`,
                Family = `Social support`) %>%
  merge(h_regions,by="Country", all.x=T) %>%
  dplyr::mutate(dystopia_res = H_score - (GDP + Family + Health + Freedom + Generosity + as.numeric(Trust)))



# Read 2019 Data
h19 <- read_csv("Happiness_Data/2019.csv")  
h19 <- h19 %>%
  dplyr::mutate(Year = 2019) %>%
  dplyr::rename(H_rank=`Overall rank`, # Modify variable names
                H_score = `Score`,
                GDP=`GDP per capita`,
                Country = `Country or region`,
                Health=`Healthy life expectancy`,
                Trust=`Perceptions of corruption`,
                Freedom = `Freedom to make life choices`,
                Family = `Social support`) %>%
  merge(h_regions,by="Country", all.x=T) %>%
  dplyr::mutate(dystopia_res = H_score - 
                  (GDP + Family + Health + Freedom + Generosity + as.numeric(Trust)))

# Combine all data into all_dat
h_alldat <- tibble(rbind.fill(h15,h16,h17,h18,h19))
h_alldat <- h_alldat %>%
  dplyr::mutate(Country = as.factor(tolower(Country)),
                Region = as.factor(Region))

#rmarkdown::paged_table(h_alldat)

knitr::kable(papeR::summarize_numeric(h_alldat, type = "numeric", group = "Region",variables = c("H_rank", "H_score"),  test = FALSE))
Region N Mean SD Min Q1 Median Q3 Max
1 H_rank Australia and New Zealand 10 9.10 1.10 8.00 8.00 9.00 10.00 11.00
1.1 Central and Eastern Europe 144 75.63 26.80 20.00 55.50 73.00 91.50 138.00
1.2 Eastern Asia 28 66.32 22.54 25.00 52.00 65.00 83.50 101.00
1.3 Latin America and Caribbean 109 49.75 29.97 12.00 28.00 43.00 63.00 148.00
1.4 Middle East and Northern Africa 96 79.56 41.40 11.00 39.00 83.00 109.00 156.00
1.5 North America 10 11.30 5.14 5.00 7.00 11.00 15.00 19.00
1.6 Southeastern Asia 44 80.55 35.45 22.00 46.50 81.50 107.00 145.00
1.7 Southern Asia 35 112.46 23.31 67.00 97.00 115.00 127.50 154.00
1.8 Sub-Saharan Africa 185 126.86 21.41 55.00 114.00 131.00 143.00 158.00
1.9 Western Europe 103 26.12 26.36 1.00 6.00 17.00 36.00 102.00
2 H_score Australia and New Zealand 10 7.29 0.03 7.23 7.28 7.30 7.31 7.33
2.1 Central and Eastern Europe 144 5.43 0.59 4.10 5.13 5.50 5.85 6.85
2.2 Eastern Asia 28 5.63 0.47 4.87 5.25 5.65 5.92 6.45
2.3 Latin America and Caribbean 109 6.02 0.73 3.58 5.74 6.12 6.48 7.23
2.4 Middle East and Northern Africa 96 5.34 1.02 3.01 4.69 5.27 6.23 7.28
2.5 North America 10 7.17 0.20 6.89 6.99 7.20 7.33 7.43
2.6 Southeastern Asia 44 5.34 0.80 3.82 4.75 5.27 6.04 6.80
2.7 Southern Asia 35 4.58 0.59 3.20 4.34 4.57 5.05 5.65
2.8 Sub-Saharan Africa 185 4.21 0.59 2.84 3.78 4.22 4.57 5.89
2.9 Western Europe 103 6.76 0.75 4.86 6.34 6.94 7.41 7.77


Death & Risk Factors Data

# Read data in
death_dat <- read_csv('/Volumes/Programming/Spring 2022/DANL 310/my_website/aLin-96.github.io/Happiness_Data/number-of-deaths-by-risk-factor.csv')

death_dat <- death_dat %>%
  filter(Year > 2015) %>%
  arrange(Year)

rmarkdown::paged_table(death_dat)


Country Profile UN Data

country_profile <- read_csv('/Volumes/Programming/Spring 2022/DANL 310/my_website/aLin-96.github.io/Happiness_Data/kiva_country_profile_variables.csv')

country_profile <- country_profile %>%
  mutate(country = tolower(country)) %>%
  dplyr::rename(Country = country) %>%
  dplyr::select(-c(Region))

h_p_alldat <- merge(h_alldat, country_profile, by = "Country")

rmarkdown::paged_table(country_profile)


TOP 10 AVG Hppiness Scores (2015 ~ 2019)

# Get Top 10 mean of happiness rank from 2015 ~ 2019

top_10 <- h_alldat %>%
  group_by(Country) %>%
  dplyr::summarise(mean_rank = mean(H_rank)) %>%
  arrange(desc(mean_rank)) %>%
  filter(mean_rank <= 10)

rmarkdown::paged_table(top_10)






Visualizations



Boxplot of Happiness Scores in different Regions

ggplot(dplyr::filter(h_alldat, Region != "NA")) +
  geom_boxplot(aes(x = H_score, y=reorder(Region, H_score), color = Region))+
  theme_classic() +
  theme(legend.position = "None") +
  labs(x = "Happiness Scores", y = "Regions")


ggplot(dplyr::filter(h_alldat, Region != "NA"), aes(x = GDP, y=H_score, color = Region)) +
  geom_point() +
  theme_classic()


base <- h_alldat %>%
  plot_ly(x = ~GDP, y = ~H_score, 
          text = ~Country, hoverinfo = "text",
          width = 800, height = 500) 

base %>%
  add_markers(color = ~Region, frame = ~Year, ids = ~Country) %>%
  animation_opts(1000, easing = "elastic", redraw = FALSE) %>%
  animation_button(
    x = 1, xanchor = "right", y = 0, yanchor = "bottom"
  ) %>%
  animation_slider(
    currentvalue = list(prefix = "YEAR ", font = list(color="red"))
  ) 




World Map Data

world_map <- map_data("world")
world <- world_map %>%
  dplyr::rename(Country = region) %>%
  dplyr::mutate(Country = str_to_lower(Country),
         Country = ifelse(
            Country == "usa",
            "united states", Country),
         Country = ifelse(
            Country == "democratic republic of the congo",
            "congo (kinshasa)", Country),
         Country = ifelse(
            Country == "republic of congo",
            "congo (brazzaville)", Country),
         Country = as.factor(Country))

h_alldat_world <- left_join(h_alldat, world, by = "Country",all.x=TRUE)

p <- ggplot(h_alldat_world, aes(long, lat, group = group,
                                fill = H_score,
                                frame = Year))+
  geom_polygon(na.rm = TRUE)+
  scale_fill_gradient(low = "white", high = "#FD8104", na.value = NA) +
  theme_map()

p %>%
  plotly::ggplotly() %>%
  animation_opts(1000, easing = "elastic",transition = 0,  redraw = FALSE)